import warnings
warnings.filterwarnings('ignore')
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
#visualization libraries
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.style as style
import matplotlib.pyplot as plt
from matplotlib import colors
import matplotlib.pyplot as plt, numpy as np
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.colors import ListedColormap
from IPython.display import Image
import plotly.express as px
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
import plotly.offline as pyo
from plotly import tools
import seaborn as sns
import missingno as msno #to visualize missing data
from imblearn.over_sampling import SMOTE
import itertools
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix,precision_score,recall_score,roc_auc_score,f1_score,plot_confusion_matrix,plot_roc_curve,roc_curve
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
pwd
For this project, we will be analyzing a credit card approval dataset. Financial institutions recieve many applications for credit cards daily. There are many factors that contribute to a person being approved or denied for a credit card. These factors cannot be overlooked by a person becuase they are time consuming. The task of analyzing the credit card approval factors can be easily done with the power of machine learning. We will build a model that will predict if an applicant is a 'good' or 'bad' applicant.
CC = pd.read_csv("/Users/juliet/Dropbox/My Mac (Juliet’s MacBook Air)/Downloads/credit_record.csv")
CC.head()
AR = pd.read_csv("/Users/juliet/Dropbox/My Mac (Juliet’s MacBook Air)/Downloads/application_record.csv")
AR.head()
The number of clients and the amount of rows are not equal, this means there are duplicates.
Analyze the Missing Vaues
missing1 = msno.matrix(AR)
missing1.set_title("Missing data for Application Records dataset", fontsize = 30)
Occupation type is missing 30% of its values.
missing2 = msno.matrix(CC)
missing2.set_title("Missing data for Credit Records dataset", fontsize = 30)
Analyze Unique Counts
counts1 = pd.DataFrame.from_records([(col, AR[col].nunique()) for col in AR.columns],
columns=['Column_Name', 'Num_Unique']).sort_values(by=['Num_Unique'])
counts2 = pd.DataFrame.from_records([(col, CC[col].nunique()) for col in CC.columns],
columns=['Column_Name', 'Num_Unique']).sort_values(by=['Num_Unique'])
counts2
Data Visualization
sns.set_context("notebook",font_scale=.7,rc={"grid.linewidth": 0.1,'patch.linewidth': 0.0,
"axes.grid":True,
"grid.linestyle": "-",
"axes.titlesize" : 13,
"figure.autolayout":True})
palette1 = ['#FF5E5B','#EC9B9A','#00CECB','#80DE99','#C0E680','#FFED66']
sns.set_palette(sns.color_palette(sns.color_palette(palette1)))
plt.figure(figsize=(10,10))
plot1 = ["CNT_CHILDREN","AMT_INCOME_TOTAL","DAYS_BIRTH","DAYS_EMPLOYED"]
AR[plot1].hist(edgecolor='black', linewidth=1.2)
fig=plt.gcf()
fig.set_size_inches(12,6)
We can see that there are outliers in children count and amount of total income.
fig, axes = plt.subplots(1,2)
plot2=sns.countplot(y=AR.NAME_INCOME_TYPE,linewidth=1.2, ax=axes[0])
plot2.set_title("Customer Distribution by Income Type")
plot2.set_xlabel("Count")
plot3=sns.countplot(y=AR.NAME_FAMILY_STATUS,linewidth=1.2, ax=axes[1])
plot3.set_title("Customer Distribution by Family Status")
plot3.set_xlabel("Count")
fig.set_size_inches(14,5)
plt.tight_layout()
plt.show()
fig, axes = plt.subplots(1,3)
plot4= AR['CODE_GENDER'].value_counts().plot.pie(explode=[0.1,0.1],autopct='%1.1f%%',shadow=True, colors=["#76B5B3","#EC9B9A"],textprops = {'fontsize':12}, ax=axes[0])
plot4.set_title("Customer Distribution by Gender")
plot5= AR['FLAG_OWN_CAR'].value_counts().plot.pie(explode=[0.1,0.1],autopct='%1.1f%%',shadow=True,colors=["#80DE99","#00CECB"],textprops = {'fontsize':12}, ax=axes[1])
plot5.set_title("Car Ownership")
plot6= AR['FLAG_OWN_REALTY'].value_counts().plot.pie(explode=[0.1,0.1],autopct='%1.1f%%',shadow=True,colors=["#76B5B3","#00CECB"],textprops = {'fontsize':12}, ax=axes[2])
plot6.set_title("Realty Ownership")
fig.set_size_inches(14,5)
plt.tight_layout()
plt.show()
Now, we will clean and prepare the raw data to enable feature engineering.
AR = AR.drop_duplicates('ID', keep='last') #remove duplicate values and keep the last entry of the ID if its repeated.
AR.drop('OCCUPATION_TYPE', axis=1, inplace=True) #dropped the missing values in occupation type.
column1 = AR.columns[AR.dtypes =='object'].tolist() #object columns in dataset
counts2 = pd.DataFrame.from_records([(col, AR[column1][col].nunique()) for col in AR[column1].columns],
columns=['Column_Name', 'Num_Unique']).sort_values(by=['Num_Unique'])
counts2 #unique counts for object columns
Here, we can see the columns that have non numeric values. We will convert them to numeric columns if needed.
AR.rename(columns={"CODE_GENDER":"Gender","FLAG_OWN_CAR":"Own_Car","FLAG_OWN_REALTY":"Own_Realty",
"CNT_CHILDREN":"Children_Count","AMT_INCOME_TOTAL":"Income","NAME_EDUCATION_TYPE":"Education",
"NAME_FAMILY_STATUS":"Family_Status","NAME_HOUSING_TYPE":"Housing_Type","DAYS_BIRTH":"Birthday",
"DAYS_EMPLOYED":"Employment_Date","FLAG_MOBIL":"Own_Mobile","FLAG_WORK_PHONE":"Own_Work_Phone",
"FLAG_PHONE":"Own_Phone","FLAG_EMAIL":"Own_Email","CNT_FAM_MEMBERS":"Family_Member_Count",
"NAME_INCOME_TYPE":"Income_Type"},inplace=True)
open1=pd.DataFrame(CC.groupby(["ID"])["MONTHS_BALANCE"].agg(min))
open1=open1.rename(columns={'MONTHS_BALANCE':'begin_month'})
indiv=pd.merge(AR,open1,how="left",on="ID") #merge to record data
#convert categoric features into numeric
indiv["Gender"] = indiv['Gender'].replace(['F','M'],[0,1])
indiv["Own_Car"] = indiv["Own_Car"].replace(["Y","N"],[1,0])
indiv["Own_Realty"] = indiv["Own_Realty"].replace(["Y","N"],[1,0])
indiv["Is_Working"] = indiv["Income_Type"].replace(["Working","Commercial associate","State servant","Pensioner","Student"],[1,1,1,0,0])
indiv["In_Relationship"] = indiv["Family_Status"].replace(["Civil marriage","Married","Single / not married",
"Separated","Widow"],[1,1,0,0,0])
housing = {'House / apartment' : 'House / apartment',
'With parents': 'With parents',
'Municipal apartment' : 'House / apartment',
'Rented apartment': 'House / apartment',
'Office apartment': 'House / apartment',
'Co-op apartment': 'House / apartment'}
indiv["Housing_Type"] = indiv['Housing_Type'].map(housing)
household = {'Single / not married':'Single',
'Separated':'Single',
'Widow':'Single',
'Civil marriage':'Married',
'Married':'Married'}
indiv["Family_Status"] = indiv["Family_Status"].map(household)
education = {'Secondary / secondary special':'secondary',
'Lower secondary':'secondary',
'Higher education':'Higher education',
'Incomplete higher':'Higher education',
'Academic degree':'Academic degree'}
indiv["Education"] = indiv["Education"].map(education)
income = {'Commercial associate':'Working',
'State servant':'Working',
'Working':'Working',
'Pensioner':'Pensioner',
'Student':'Student'}
indiv["Income_Type"] = indiv["Income_Type"].map(income)
indiv["Household_Size"] = indiv["Children_Count"] + indiv["In_Relationship"].apply(lambda x: 2 if x==1 else 1)
indiv["Age"] = round((indiv.Birthday/365)*-1)
indiv["Experience"] = indiv.Employment_Date/365
indiv['Experience']=indiv['Experience'].apply(lambda v : int(v*-1) if v <0 else 0)
indiv=indiv.drop(columns=['Employment_Date','Birthday','Children_Count'])
indiv= pd.get_dummies(indiv, columns=['Income_Type', 'Education','Family_Status',"Housing_Type"])
indiv.head()
column2 = ["Income", "Age", "Experience", "Household_Size"]
fig = make_subplots(rows=2, cols=2, start_cell="bottom-left",
subplot_titles=("Income", "Age", "Experience", "Family Member Count"))
fig.add_trace(go.Box(x=indiv.Income, name='Income',boxmean=True),row=1,col=1)
fig.add_trace(go.Box(x=indiv.Age, name='Age', boxmean=True), row=1, col=2)
fig.add_trace(go.Box(x=indiv.Experience, name='Experience', boxmean=True), row=2, col=1)
fig.add_trace(go.Box(x=indiv.Household_Size, name="Family Member Count", boxmean=True),row=2, col=2)
fig.show()
The box plots above display the outliers in children count, family member count, income and employment rate columns. In order to create a accurate model we need to remove them bu using z scores.
def calculate_z_scores(df, cols):
for col in cols:
df[col+"_z_score"] = (df[col] - df[col].mean())/df[col].std()
return df
deg = calculate_z_scores(df = indiv, cols = ["Income","Experience","Household_Size"])
#removing outliers
R1 = deg.Household_Size_z_score.abs() <= 3.5
R2 = deg.Experience_z_score.abs() <= 3.5
R3 = deg.Income_z_score.abs() <= 3.5
app1 = deg[R1 & R2 & R3]
app1.drop(columns= ["Income_z_score","Experience_z_score","Household_Size_z_score"],inplace=True)
column3 = ["Income","Age","Experience","Family_Member_Count"]
fig = make_subplots(rows=2, cols=2, start_cell="bottom-left",
subplot_titles=("Income", "Age", "Experience", "Family Member Count"))
fig.add_trace(go.Box(x=app1.Income, name='Income',boxmean=True),row=1,col=1)
fig.add_trace(go.Box(x=app1.Age, name='Age', boxmean=True), row=1, col=2)
fig.add_trace(go.Box(x=app1.Experience, name='Experience', boxmean=True), row=2, col=1)
fig.add_trace(go.Box(x=app1.Household_Size, name="Family Member Count", boxmean=True),row=2, col=2)
fig.show()
CC['dep_value'] = None
CC['dep_value'][CC['STATUS'] =='2']='Yes'
CC['dep_value'][CC['STATUS'] =='3']='Yes'
CC['dep_value'][CC['STATUS'] =='4']='Yes'
CC['dep_value'][CC['STATUS'] =='5']='Yes'
check=CC.groupby('ID').count()
check['dep_value'][check['dep_value'] > 0]='Yes'
check['dep_value'][check['dep_value'] == 0]='No'
check = check[['dep_value']]
# Data to analyze length of time since initial approval of credit card
# Shows number of past dues, paid off and no loan status.
group = CC.groupby('ID')
pivot1 = CC.pivot(index = 'ID', columns = 'MONTHS_BALANCE', values = 'STATUS')
pivot1['open_month'] = group['MONTHS_BALANCE'].min()
pivot1['end_month'] = group['MONTHS_BALANCE'].max()
pivot1['window'] = pivot1['end_month'] - pivot1['open_month']
pivot1['window'] += 1 # Adding 1 since month starts at 0.
#Counting number of past dues, paid offs and no loans.
pivot1['paid_off'] = pivot1[pivot1.iloc[:,0:61] == 'C'].count(axis = 1)
pivot1['pastdue_1-29'] = pivot1[pivot1.iloc[:,0:61] == '0'].count(axis = 1)
pivot1['pastdue_30-59'] = pivot1[pivot1.iloc[:,0:61] == '1'].count(axis = 1)
pivot1['pastdue_60-89'] = pivot1[pivot1.iloc[:,0:61] == '2'].count(axis = 1)
pivot1['pastdue_90-119'] = pivot1[pivot1.iloc[:,0:61] == '3'].count(axis = 1)
pivot1['pastdue_120-149'] = pivot1[pivot1.iloc[:,0:61] == '4'].count(axis = 1)
pivot1['pastdue_over_150'] = pivot1[pivot1.iloc[:,0:61] == '5'].count(axis = 1)
pivot1['no_loan'] = pivot1[pivot1.iloc[:,0:61] == 'X'].count(axis = 1)
#Setting Id column to merge with app data.
pivot1['ID'] = pivot1.index
pivot1.head()
plot3 = pd.DataFrame()
plot3['ID'] = pivot1.index
plot3['paid_off'] = pivot1['paid_off'].values
plot3['#_of_pastdues'] = pivot1['pastdue_1-29'].values+ pivot1['pastdue_30-59'].values + pivot1['pastdue_60-89'].values +pivot1['pastdue_90-119'].values+pivot1['pastdue_120-149'].values +pivot1['pastdue_over_150'].values
plot3['no_loan'] = pivot1['no_loan'].values
app2 = app1.merge(plot3, how = 'inner', on = 'ID')
app3=pd.merge(app2,check,how='inner',on='ID')
app3['target']=app3['dep_value']
app3.loc[app3['target']=='Yes','target']=1
app3.loc[app3['target']=='No','target']=0
app3.drop(columns=["dep_value"],inplace=True)
matplotlib.rcParams.update(matplotlib.rcParamsDefault)
f, ax = plt.subplots(figsize=(15,15))
map1 = sns.diverging_palette(230, 20, as_cmap=True)
corr = app3.drop(columns=["Own_Mobile"]).corr().round(1)
map2 = np.triu(np.ones_like(corr, dtype=bool))
sns.heatmap(corr, annot=True, mask = map2, cmap=map1)
app3['target'].value_counts().plot.pie(explode=[0.1,0.1],autopct='%1.1f%%',shadow=True, colors=['#FF5E5B', '#C0E680'],textprops = {'fontsize':7}).set_title("Target distribution")
plt.show()
sns.set_context("notebook",font_scale=.7,rc={"grid.linewidth": 0.1,'patch.linewidth': 0.0,
"axes.grid":True,
"grid.linestyle": "-",
"axes.titlesize" : 13,
'figure.figsize':(15,15)})
palette2 = ['#FF5E5B','#EC9B9A','#00CECB','#80DE99','#C0E680','#FFED66']
sns.set_palette(sns.color_palette(sns.color_palette(palette2)))
fig, axes = plt.subplots(1,3)
plot4=sns.boxenplot(x='target', y='Income', data=app3,palette=['#FF5E5B', '#C0E680'], ax=axes[0])
plot4.set_title("Income-Target")
plot5=sns.boxenplot(x='target', y='Age', data=app3,palette=['#FF5E5B', '#C0E680'], ax=axes[1])
plot5.set_title("Age-Target")
plot6=sns.boxenplot(x='target', y='Experience', data=app3,palette=['#FF5E5B', '#C0E680'], ax=axes[2])
plot6.set_title("Work Experience-Target")
fig.set_size_inches(14,5)
plt.tight_layout()
sns.displot(data=app3, x='Income', hue="Is_Working", col='target', kind="kde", height=4, facet_kws={'sharey': False, 'sharex': False},palette=['#C70039','#80DE99'])
sns.displot(data=app3, x='Age', hue="Is_Working", col='target', kind="kde", height=4, facet_kws={'sharey': False, 'sharex': False},palette=['#C70039','#80DE99'])
sns.displot(data=app3, x='Experience', hue="Is_Working", col='target', kind="kde", height=4, facet_kws={'sharey': False, 'sharex': False},palette=['#C70039','#80DE99'])
sns.displot(data=app3, x='begin_month', hue="Is_Working", col='target', kind="kde", height=4, facet_kws={'sharey': False, 'sharex': False},palette=['#C70039','#80DE99'])
sns.displot(data=app3, x='no_loan', hue="Is_Working", col='target', kind="kde", height=4, facet_kws={'sharey': False, 'sharex': False},palette=['#C70039','#80DE99'])
sns.displot(data=app3, x='#_of_pastdues', hue="Is_Working", col='target', kind="kde", height=4, facet_kws={'sharey': False, 'sharex': False},palette=['#C70039','#80DE99'])
sns.displot(data=app3, x='paid_off', hue="Is_Working", col='target', kind="kde", height=4, facet_kws={'sharey': False, 'sharex': False},palette=['#C70039','#80DE99'])
app3.head()
Feature Selection: Feature Selection is a feature engineering component that involves the removal of irrelevant features and picks the best set of features to train a robust machine learning model.
Calculation the Weight of Evidence (WOE) The weight of evidence tells the predictive power of an independent variable in relation to the dependent variable.
def calc_iv(df, feature, target, pr=False):
lst = []
df[feature] = df[feature].fillna("NULL")
for i in range(df[feature].nunique()):
val = list(df[feature].unique())[i]
lst.append([feature, # Variable
val, # Value
df[df[feature] == val].count()[feature], # All
df[(df[feature] == val) & (df[target] == 0)].count()[feature], # Good (think: Fraud == 0)
df[(df[feature] == val) & (df[target] == 1)].count()[feature]]) # Bad (think: Fraud == 1)
data = pd.DataFrame(lst, columns=['Variable', 'Value', 'All', 'Good', 'Bad'])
data['Share'] = data['All'] / data['All'].sum()
data['Bad Rate'] = data['Bad'] / data['All']
data['Distribution Good'] = (data['All'] - data['Bad']) / (data['All'].sum() - data['Bad'].sum())
data['Distribution Bad'] = data['Bad'] / data['Bad'].sum()
data['WoE'] = np.log(data['Distribution Good'] / data['Distribution Bad'])
data = data.replace({'WoE': {np.inf: 0, -np.inf: 0}})
data['IV'] = data['WoE'] * (data['Distribution Good'] - data['Distribution Bad'])
data = data.sort_values(by=['Variable', 'Value'], ascending=[True, True])
data.index = range(len(data.index))
if pr:
print(data)
print('IV = ', data['IV'].sum())
iv = data['IV'].sum()
return iv, data
features = app3.columns.tolist()[:-1]
iv_list = []
for feature in features:
iv, data = calc_iv(app3, feature, 'target')
iv_list.append(round(iv,4))
df2 = pd.DataFrame(np.column_stack([features, iv_list]),
columns=['Feature', 'iv'])
df2
x=app3.loc[:, app3.columns != 'target']
y=app3.iloc[:,-1:]
X=x
Feature Scaling
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(x)
X = pd.DataFrame(scaler.transform(x), columns=[x.columns])
SMOTE (Synthetic Minority Oversampling Technique)
Imbalanced classification involves developing predictive models on classification datasets that have a severe class imbalance.
The challenge of working with imbalanced datasets is that most machine learning techniques will ignore, and in turn have poor performance on, the minority class, although typically it is performance on the minority class that is most important.
One approach to addressing imbalanced datasets is to oversample the minority class. The simplest approach involves duplicating examples in the minority class, although these examples don’t add any new information to the model. Instead, new examples can be synthesized from the existing examples.
y = y.astype('int')
X_balance,Y_balance = SMOTE().fit_resample(X,y)
X_balance = pd.DataFrame(X_balance, columns = X.columns)
Value counts are now balanced.
RFE (Recursive Feature Elimination)
Recursive Feature Elimination is effective at selecting the features (columns) in a training dataset that are more or most relevant in predicting the target variable.
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
cols = app3.loc[:, app3.columns != 'target'].columns.tolist()
model = LogisticRegression(solver='liblinear')
rfe = RFE(model)
fit = rfe.fit(X_balance, Y_balance)
rfe_features = pd.DataFrame({"Feature":cols,
"Support_LogisticRegression":fit.support_,
"Feature_Rank_logisticRegression":fit.ranking_})
rfe_features
Extra Tree Classifier
This class implements a meta estimator that fits a number of randomized decision trees (a.k.a. extra-trees) on various sub-samples of the dataset and use averaging to improve the predictive accuracy and control over-fitting.
from sklearn.ensemble import ExtraTreesClassifier
model = ExtraTreesClassifier(n_estimators=10)
model.fit(X_balance, Y_balance)
feature_importances = pd.DataFrame({"Feature":cols,
"Feature_Importance_ExtratreeClassifier":model.feature_importances_})
Results from Feature Selection Methods
df1=pd.merge(df2, feature_importances, on=["Feature"])
feature_selection_df = pd.merge(df1, rfe_features, on=["Feature"])
feature_selection_df
The selected features below were chosen from the results above.
selected_features = ["paid_off","begin_month","#_of_pastdues","no_loan","Income","Experience",
"Education_Higher education","Education_secondary","Own_Realty",
"Family_Status_Single","Family_Member_Count","Is_Working",
"Own_Car","Age"]
X_balance= X_balance[selected_features]
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_balance, Y_balance, random_state=100, test_size=0.3)
print(X_train.shape)
The Model
classifiers = {
"LogisticRegression" : LogisticRegression(),
"KNeighbors" : KNeighborsClassifier(),
"SVC" : SVC(C = 0.8,kernel='linear',probability=True),
"DecisionTree" : DecisionTreeClassifier(),
"RandomForest" : RandomForestClassifier(n_estimators=250,max_depth=12,min_samples_leaf=16),
"XGBoost" : XGBClassifier(max_depth=12,
n_estimators=250,
min_child_weight=8,
subsample=0.8,
learning_rate =0.02,
seed=42),
"CatBoost" : CatBoostClassifier(iterations=250,
learning_rate=0.2,
od_type='Iter',
verbose=25,
depth=16,
random_seed=42)
}
result_table = pd.DataFrame(columns=['classifiers','accuracy','presicion','recall','f1_score','fpr','tpr','auc'])
for key, classifier in classifiers.items():
classifier.fit(X_train, y_train)
y_predict = classifier.predict(X_test)
yproba = classifier.predict_proba(X_test)[::,1]
fpr, tpr, _ = roc_curve(y_test, yproba)
auc = roc_auc_score(y_test, yproba)
conf_matrix = confusion_matrix(y_test,y_predict)
result_table = result_table.append({'classifiers':key,
'accuracy':accuracy_score(y_test, y_predict),
'presicion':precision_score(y_test, y_predict, average='weighted'),
'recall':recall_score(y_test, y_predict, average='weighted'),
'f1_score':f1_score(y_test, y_predict, average='weighted'),
'fpr':fpr,
'tpr':tpr,
'auc':auc
}, ignore_index=True)
result_table.set_index('classifiers', inplace=True)
Results
fig, axes = plt.subplots(nrows=3, ncols=3, figsize=(15,10))
for cls, ax in zip(list(classifiers.values()), axes.flatten()):
plot_confusion_matrix(cls,
X_test,
y_test,
ax=ax,
cmap='Blues')
ax.title.set_text(type(cls).__name__)
plt.tight_layout()
plt.show()
fig = plt.figure(figsize=(8,6))
for i in result_table.index:
plt.plot(result_table.loc[i]['fpr'],
result_table.loc[i]['tpr'],
label="{}, AUC={:.3f}".format(i, result_table.loc[i]['auc']))
plt.plot([0,1], [0,1], color='orange', linestyle='--')
plt.xticks(np.arange(0.0, 1.1, step=0.1))
plt.xlabel("Flase Positive Rate", fontsize=15)
plt.yticks(np.arange(0.0, 1.1, step=0.1))
plt.ylabel("True Positive Rate", fontsize=15)
plt.title('ROC Curve Analysis', fontweight='bold', fontsize=15)
plt.legend(prop={'size':13}, loc='lower right')
plt.show()
From the ROC Curve above, we can see that the model created by using the CATBoost Classifier returns the higher AUC. The AUC is 1 which means it has a good measure of separability.
result_table.iloc[:,:4]
The results from the table above agree with our ROC curve, the model created by using the CATBoost Classifier returns the highest accuracy.